Diet (dis)-similarity

Author

Max Lindmark

Published

2024-03-22

Load packages

home <- here::here()

# Load libraries
library(tidyverse)
library(tidylog)
library(RCurl)
library(gllvm)
library(RColorBrewer)
library(ggrepel)
library(vegan)
library(patchwork)
library(RCurl)
library(devtools)
library(ggpubr)
library(janitor)
library(brms)
library(modelr)
library(tidybayes)
library(ggstats)

# Source map-plot
source_url("https://raw.githubusercontent.com/maxlindmark/cod-interactions/main/R/functions/map-plot.R")
#source(paste0(home, "R/functions/map-plot.R"))

# Source code for rotating ordination plot (to make it work in ggplot)
source_url("https://raw.githubusercontent.com/maxlindmark/cod-interactions/main/R/functions/rotate.R")
#source(paste0(home, "R/functions/rotate.R"))

Read data

d <- read_csv(paste0(home, "/data/clean/aggregated_stomach_data.csv"))

Diet similarity using bar plots and ordination

Ordination using gllvm

d2 <- d %>%
  dplyr::select(ends_with("_tot"))

d2$pred_id <- d$pred_id

d2 <- d2 %>%
  left_join(d %>% dplyr::select(pred_length_cm, pred_weight_g, pred_id, species), by = "pred_id")
left_join: added 3 columns (pred_length_cm, pred_weight_g, species)
           > rows only in x       0
           > rows only in y  (    0)
           > matched rows     9,649
           >                 =======
           > rows total       9,649
# Calculate relative prey weight and average by size-class
d3 <- d2 %>% 
  pivot_longer(ends_with("_tot")) %>% 
  mutate(value = value/pred_weight_g) %>% 
  filter(pred_length_cm >= 10 & pred_length_cm <= 50) %>% # These are the sizes we work with
  #filter(value < quantile(value, probs = 0.995)) %>% 
  # mutate(value = ifelse(value > quantile(value, probs = 0.975),
  #                       quantile(value, probs = 0.975),
  #                       value)) %>%
  mutate(pred_length_cm = cut_width(pred_length_cm, 2),
         pred_length_cm = str_remove(pred_length_cm, "[(]")) %>% 
  separate_wider_delim(pred_length_cm, delim = ",", names = c("pred_length_cm", "scrap")) %>% 
  mutate(pred_length_cm = ifelse(pred_length_cm == "[9", "9", pred_length_cm),
         pred_length_cm = as.numeric(pred_length_cm)) %>% 
  dplyr::select(-scrap) %>% 
  group_by(species, pred_length_cm, name) %>% 
  summarise(tot_prey_weight = mean(value)) %>%
  ungroup() %>% 
  mutate(name = str_replace(name, "_", " "),
         name = str_replace(name, "_", " "),
         name = str_remove(name, " tot"),
         name = str_to_title(name))
pivot_longer: reorganized (amphipoda_tot, bivalvia_tot, clupeidae_tot, clupea_harengus_tot, gadus_morhua_tot, …) into (name, value) [was 9649x19, now 144735x6]
filter: removed 7,065 rows (5%), 137,670 rows remaining
summarise: now 585 rows and 4 columns, 2 group variables remaining (species, pred_length_cm)
ungroup: no grouping variables
d3 %>% filter(tot_prey_weight > 0) %>% arrange(tot_prey_weight)
filter: removed 174 rows (30%), 411 rows remaining
# A tibble: 411 × 4
   species  pred_length_cm name            tot_prey_weight
   <chr>             <dbl> <chr>                     <dbl>
 1 Cod                  43 Polychaeta         0.0000000895
 2 Cod                  35 Non Bio            0.000000214 
 3 Cod                  47 Amphipoda          0.000000283 
 4 Cod                  45 Polychaeta         0.000000292 
 5 Cod                  41 Bivalvia           0.000000365 
 6 Flounder             33 Mysidae            0.000000366 
 7 Cod                  43 Amphipoda          0.000000406 
 8 Cod                  45 Bivalvia           0.000000930 
 9 Flounder             25 Clupea Harengus    0.00000102  
10 Cod                  41 Amphipoda          0.00000104  
# ℹ 401 more rows
d_wide <- d3 %>% pivot_wider(names_from = name, values_from = tot_prey_weight)
pivot_wider: reorganized (name, tot_prey_weight) into (Amphipoda, Bivalvia, Clupea Harengus, Clupeidae, Gadus Morhua, …) [was 585x4, now 39x17]
d_mat <- d_wide %>%
  dplyr::select(-species, -pred_length_cm) %>%
  as.matrix()

nrow(d_mat)
[1] 39
# Fit gllvm model with two latend variables and no covariates, meaning it becomes an ordination
set.seed(999)

fit <- gllvm(y = d_mat, family = "tweedie", num.lv = 2)
Note that, the tweedie family is implemented using the extended variational approximation method. 
fit
Call: 
gllvm(y = d_mat, family = "tweedie", num.lv = 2)
family: 
[1] "tweedie"
method: 
[1] "VA"

log-likelihood:  2249.647 
Residual degrees of freedom:  526 
AIC:  -4381.294 
AICc:  -4367.808 
BIC:  -4123.369 
# Let's do a ggplot-residual instead
res <- residuals(fit)
p <- NCOL(fit$y)
sppind <- 1:p
ds_res <- res$residuals[, sppind]

# Compare with built-in plot from gllvm: plot(fit, which = 2)
ds_res %>% 
  as.data.frame() %>% 
  pivot_longer(cols = everything()) %>% 
  ggplot(aes(sample = value)) + 
  geom_qq(size = 0.8) + 
  geom_qq_line() + 
  labs(y = "Sample quantiles",
       x = "Theoretical quantiles")
pivot_longer: reorganized (Amphipoda, Bivalvia, Clupea Harengus, Clupeidae, Gadus Morhua, …) into (name, value) [was 39x15, now 585x2]

ggsave(paste0(home, "/figures/supp/qq_ordi_gllvm.pdf"), width = 11, height = 11, units = "cm")

# Plot with GGplot
# Now make the ordination plot. We want to use ggplot, so need to hack it a bit
# See this thread: https://github.com/JenniNiku/gllvm/discussions/116
# ordiplot(fit, biplot = TRUE)

# Extract site scores
LVs = as.data.frame(getLV(fit))

# Bind LV site scores to metadata
# LVs = cbind(LVs, sim.meta)
# 
# ordiplot(fittw, biplot = TRUE)
# 
# ordiplot(fittw, biplot = TRUE, symbols = TRUE, spp.colors = "white")
# 
LVs2 <- rotate(fit)

# text?
labs <- LVs2$species %>%
  as.data.frame() %>% 
  rownames_to_column(var = "prey")

dd <- LVs2$sites %>%
  as.data.frame() %>% 
  bind_cols(d_wide %>% dplyr::select(species, pred_length_cm)) %>% 
  mutate(group = "Flounder",
         group = ifelse(species == "Cod" & pred_length_cm <=  25, "Small cod", group),
         group = ifelse(species == "Cod" & pred_length_cm > 25, "Large cod", group))

# Alternative palettes for the groups
pal <- (brewer.pal(n = 11, name = "RdYlBu")[c(11, 4, 1)])
#pal <- (brewer.pal(n = 11, name = "RdYlGn")[c(11, 4, 1)])
#pal <- (brewer.pal(n = 8, name = "Dark2")[c(8, 7, 6)])
#pal <- brewer.pal(n = 8, name = "Dark2")[c(2, 7, 6)]

ordi <- ggplot(dd, aes(x = V1, y = V2, color = factor(group, levels = c("Flounder", "Small cod", "Large cod")),
                       size = pred_length_cm)) +
  geom_point(alpha = 0.6) + 
  # stat_ellipse(aes(V1, V2, color = group), 
  #              inherit.aes = FALSE, linewidth = 1, alpha = 0.3) + 
  scale_radius(range = c(0.8, 4)) +
  scale_color_manual(values = pal) +
  scale_fill_manual(values = pal) + 
  labs(size = "Predator length [cm]", shape = "Species", fill = "Group", color = "",
       x = "Latent variable 1", y = "Latent variable 2") +
  geom_label_repel(data = labs, aes(V1, V2, label = prey),
                   color = "grey20", inherit.aes = FALSE, size = 2, alpha = 0.4,
                   box.padding = 0.25) +
  theme_sleek() +
  coord_cartesian(xlim = c(-3, 4), ylim = c(-3, 3)) + 
  guides(color = guide_legend(ncol = 4),
         size = guide_legend(ncol = 4)) + 
  theme(legend.position = "bottom",
        legend.box = "vertical",
        legend.margin = margin(-0.3, 0, 0, 0, unit = "cm"))

ordi

Make a plot of the ontogenetic development of diets

# Calculate relative prey weight and average by size-class
d3 <- d2 %>% 
  pivot_longer(ends_with("_tot")) %>% 
  mutate(value = value/pred_weight_g) %>% 
  group_by(species, pred_length_cm, name) %>% 
  summarise(tot_prey_weight = mean(value)) %>%
  ungroup() %>% 
  mutate(name = str_replace(name, "_", " "),
         name = str_replace(name, "_", " "),
         name = str_remove(name, " tot"),
         name = str_to_title(name)) %>% 
  filter(tot_prey_weight < quantile(tot_prey_weight, probs = 0.995))
pivot_longer: reorganized (amphipoda_tot, bivalvia_tot, clupeidae_tot, clupea_harengus_tot, gadus_morhua_tot, …) into (name, value) [was 9649x19, now 144735x6]
summarise: now 1,545 rows and 4 columns, 2 group variables remaining (species, pred_length_cm)
ungroup: no grouping variables
filter: removed 8 rows (1%), 1,537 rows remaining
max_size_cod <- 65

cod_important_prey <- d3 %>%
  filter(species == "Cod") %>% 
  mutate(pred_length_cm2 = ifelse(pred_length_cm > max_size_cod, max_size_cod -1, pred_length_cm)) %>% 
  mutate(predator_length_grp = cut(pred_length_cm2, breaks = seq(0, 100, by = 5))) %>% 
  group_by(name, predator_length_grp) %>%
  summarise(prey_group_tot = sum(tot_prey_weight)) %>% 
  ungroup() %>% 
  group_by(predator_length_grp) %>% 
  mutate(prop = prey_group_tot / sum(prey_group_tot)) %>% 
  ungroup() %>%
  mutate(max_size = as.numeric(substr(predator_length_grp, 5, 6)),
         max_size = ifelse(predator_length_grp == "(0,5]", 5, max_size),
         max_size = ifelse(predator_length_grp == "(5,10]", 10, max_size),
         predator = "Cod")
filter: removed 509 rows (33%), 1,028 rows remaining
summarise: now 195 rows and 3 columns, one group variable remaining (name)
ungroup: no grouping variables
ungroup: no grouping variables
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `max_size = as.numeric(substr(predator_length_grp, 5, 6))`.
Caused by warning:
! NAs introduced by coercion
max_size_fle <- 40

fle_important_prey <- d3 %>%
  filter(species == "Flounder") %>% 
  mutate(pred_length_cm2 = ifelse(pred_length_cm > max_size_fle, max_size_fle-1, pred_length_cm)) %>% 
  mutate(predator_length_grp = cut(pred_length_cm2, breaks = seq(0, 100, by = 5))) %>% 
  group_by(name, predator_length_grp) %>%
  summarise(prey_group_tot = sum(tot_prey_weight)) %>% 
  ungroup() %>% 
  group_by(predator_length_grp) %>% 
  mutate(prop = prey_group_tot / sum(prey_group_tot)) %>% 
  ungroup() %>%
  mutate(max_size = as.numeric(substr(predator_length_grp, 5, 6)),
         max_size = ifelse(predator_length_grp == "(0,5]", 5, max_size),
         max_size = ifelse(predator_length_grp == "(5,10]", 10, max_size),
         predator = "Flounder")
filter: removed 1,028 rows (67%), 509 rows remaining
summarise: now 105 rows and 3 columns, one group variable remaining (name)
ungroup: no grouping variables
ungroup: no grouping variables
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `max_size = as.numeric(substr(predator_length_grp, 5, 6))`.
Caused by warning:
! NAs introduced by coercion
area_plot <- bind_rows(fle_important_prey, cod_important_prey) %>% 
  mutate(prop = replace_na(prop, 0))

n_cat <- nrow(area_plot %>% distinct(name))
distinct: removed 285 rows (95%), 15 rows remaining
colourCount <- n_cat
getPalette <- colorRampPalette(brewer.pal(12, "Paired"))
pal2 <- getPalette(colourCount)

area_plot %>% distinct(name)
distinct: removed 285 rows (95%), 15 rows remaining
# A tibble: 15 × 1
   name              
   <chr>             
 1 Amphipoda         
 2 Bivalvia          
 3 Clupea Harengus   
 4 Clupeidae         
 5 Gadus Morhua      
 6 Gobiidae          
 7 Mysidae           
 8 Non Bio           
 9 Other             
10 Other Crustacea   
11 Other Pisces      
12 Platichthys Flesus
13 Polychaeta        
14 Saduria Entomon   
15 Sprattus Sprattus 
# Dataframes for geom_text with sample size
n_cod <- d2 %>%
  filter(species == "Cod") %>% 
  mutate(pred_length_cm2 = ifelse(pred_length_cm > max_size_cod, max_size_cod -1, pred_length_cm)) %>% 
  mutate(predator_length_grp = cut(pred_length_cm2, breaks = seq(0, 100, by = 5))) %>% 
  group_by(predator_length_grp) %>%
  summarise(n = length(unique(pred_id)))
filter: removed 3,882 rows (40%), 5,767 rows remaining
summarise: now 13 rows and 2 columns, ungrouped
n_fle <- d2 %>%
  filter(species == "Flounder") %>% 
  mutate(pred_length_cm2 = ifelse(pred_length_cm > max_size_fle, max_size_fle-1, pred_length_cm)) %>% 
  mutate(predator_length_grp = cut(pred_length_cm2, breaks = seq(0, 100, by = 5))) %>% 
  group_by(predator_length_grp) %>%
  summarise(n = length(unique(pred_id)))
filter: removed 5,767 rows (60%), 3,882 rows remaining
summarise: now 7 rows and 2 columns, ungrouped
n_dat <- bind_rows(n_cod %>% mutate(predator = "Cod"), 
                   n_fle %>% mutate(predator = "Flounder")) %>% 
  mutate(max_size = as.numeric(substr(predator_length_grp, 5, 6)),
         max_size = ifelse(predator_length_grp == "(0,5]", 5, max_size),
         max_size = ifelse(predator_length_grp == "(5,10]", 10, max_size))
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `max_size = as.numeric(substr(predator_length_grp, 5, 6))`.
Caused by warning:
! NAs introduced by coercion
bar_diet <- 
  ggplot(data = area_plot, aes(x = max_size, y = prop, fill = name, color = name)) +
  geom_col(width = 4.3) +
  geom_text(data = n_dat, aes(x = max_size, y = 1.08, label = n), inherit.aes = FALSE,
            size = 0, color = "white") +
  geom_text(data = n_dat, aes(x = max_size, y = 1.04, label = n), inherit.aes = FALSE,
            size = 2) +
  facet_wrap(~predator, scales = "free") +
  scale_fill_manual(values = pal2, name = "") +
  scale_color_manual(values = pal2, name = "") +
  coord_cartesian(expand = 0) +
  scale_x_continuous(breaks = seq(0, 100, 5)) +
  labs(y = "Proportion", x = "Max. predator size in group [cm]") +
  theme(legend.key.size = unit(0.2, 'cm'),
        legend.position = "bottom",
        legend.margin = margin(-0.3, 0, 0, 0, unit = "cm"))

Combine plots

bar_diet / ordi + plot_annotation(tag_levels = "A") #+ plot_layout(heights = c(1, 1.6))

ggsave(paste0(home, "/figures/ordi_diet.pdf"), width = 17, height = 21, units = "cm")

Schoener’s overlap index

# Calculate relative prey weight and average by size-class
year_rect_sum <- d2 %>% 
  pivot_longer(ends_with("_tot")) %>% 
  filter(pred_length_cm > 10 & pred_length_cm < 50) %>% # These are the sizes we work with
  left_join(d %>% dplyr::select(year, subdiv, ices_rect, X, Y, pred_id), by = "pred_id") %>% 
  mutate(group = "Flounder",
         group = ifelse(species == "Cod" & pred_length_cm <=  25, "Small cod", group),
         group = ifelse(species == "Cod" & pred_length_cm > 25, "Large cod", group)) %>% 
  group_by(year, ices_rect, group) %>%
  summarise(n = length(unique(pred_id)),
            tot_prey = sum(value)) %>% 
  ungroup() %>% 
  group_by(year, ices_rect) %>% 
  mutate(min_stom = min(n)) %>% 
  ungroup() %>%
  mutate(id = paste(year, ices_rect, group, sep = "_")) %>% 
  dplyr::select(-year, -ices_rect, -group) %>% 
  filter(n > 3)
pivot_longer: reorganized (amphipoda_tot, bivalvia_tot, clupeidae_tot, clupea_harengus_tot, gadus_morhua_tot, …) into (name, value) [was 9649x19, now 144735x6]
filter: removed 8,475 rows (6%), 136,260 rows remaining
left_join: added 5 columns (year, subdiv, ices_rect, X, Y)
           > rows only in x         0
           > rows only in y  (    565)
           > matched rows     136,260
           >                 =========
           > rows total       136,260
summarise: now 280 rows and 5 columns, 2 group variables remaining (year, ices_rect)
ungroup: no grouping variables
ungroup: no grouping variables
filter: removed 34 rows (12%), 246 rows remaining
# year_rect_sum %>% arrange(n) %>% as.data.frame()
# year_rect_sum %>% arrange(min_stom) %>% as.data.frame()
# summary(year_rect_sum$n)

diet_prop <- d2 %>% 
  pivot_longer(ends_with("_tot")) %>% 
  filter(pred_length_cm > 10 & pred_length_cm < 50) %>% # These are the sizes we work with
  left_join(d %>% dplyr::select(year, subdiv, ices_rect, X, Y, pred_id), by = "pred_id") %>% 
  mutate(group = "Flounder",
         group = ifelse(species == "Cod" & pred_length_cm <=  25, "Small cod", group),
         group = ifelse(species == "Cod" & pred_length_cm > 25, "Large cod", group)) %>% 
  group_by(year, ices_rect, group, name) %>%
  summarise(tot_prey_by_grp = sum(value)) %>% 
  ungroup() %>%
  mutate(id = paste(year, ices_rect, group, sep = "_")) %>% 
  filter(id %in% unique(year_rect_sum$id)) %>% 
  left_join(year_rect_sum, by = "id") %>% 
  mutate(prop_prey = tot_prey_by_grp / tot_prey)
pivot_longer: reorganized (amphipoda_tot, bivalvia_tot, clupeidae_tot, clupea_harengus_tot, gadus_morhua_tot, …) into (name, value) [was 9649x19, now 144735x6]
filter: removed 8,475 rows (6%), 136,260 rows remaining
left_join: added 5 columns (year, subdiv, ices_rect, X, Y)
           > rows only in x         0
           > rows only in y  (    565)
           > matched rows     136,260
           >                 =========
           > rows total       136,260
summarise: now 4,200 rows and 5 columns, 3 group variables remaining (year, ices_rect, group)
ungroup: no grouping variables
filter: removed 510 rows (12%), 3,690 rows remaining
left_join: added 3 columns (n, tot_prey, min_stom)
           > rows only in x       0
           > rows only in y  (    0)
           > matched rows     3,690
           >                 =======
           > rows total       3,690
# Calculate overlap by year and ices rect and species group. Make the proportions wide!
#unique(diet_prop$group)
diet_prop_wide <- diet_prop %>% 
  dplyr::select(-tot_prey_by_grp, -tot_prey, -id, -n) %>% 
  pivot_wider(names_from = group, values_from = prop_prey) %>% 
  mutate(abs_f_sc = abs(Flounder - `Small cod`),
         abs_f_lc = abs(Flounder - `Large cod`),
         abs_lc_sc = abs(`Large cod` - `Small cod`)) %>% 
  group_by(year, ices_rect) %>% 
  summarise(`Flounder\nSmall cod` = 1 - 0.5*sum(abs_f_sc),
            `Flounder\nLarge cod` = 1 - 0.5*sum(abs_f_lc),
            `Small cod\nLarge cod` = 1 - 0.5*sum(abs_lc_sc))
pivot_wider: reorganized (group, prop_prey) into (Flounder, Large cod, Small cod) [was 3690x6, now 1500x7]
summarise: now 100 rows and 5 columns, one group variable remaining (year)
diet_prop_wide$latitude <- mapplots::ices.rect(diet_prop_wide$ices_rect)$lat
diet_prop_wide$longitude <- mapplots::ices.rect(diet_prop_wide$ices_rect)$lon

diet_prop_wide <- sdmTMB::add_utm_columns(diet_prop_wide)
Warning: Multiple UTM zones detected.
Proceeding with the most common value.
You may wish to choose a different projection.
Proceeding with UTM zone 33N; CRS = 32633.
Visit https://epsg.io/32633 to verify.
ovr <- diet_prop_wide %>%
  pivot_longer(c(`Flounder\nSmall cod`, `Flounder\nLarge cod`, `Small cod\nLarge cod`),
               names_to = "overlap_group", values_to = "overlap") %>% 
  drop_na(overlap)
pivot_longer: reorganized (Flounder
Small cod, Flounder
Large cod, Small cod
Large cod) into (overlap_group, overlap) [was 100x9, now 300x8]
drop_na (grouped): removed 94 rows (31%), 206 rows remaining
ovr
# A tibble: 206 × 8
# Groups:   year [8]
    year ices_rect latitude longitude     X     Y overlap_group          overlap
   <dbl> <chr>        <dbl>     <dbl> <dbl> <dbl> <chr>                    <dbl>
 1  2015 40G4          55.8      14.5  469. 6178. "Flounder\nSmall cod"  0.0721 
 2  2015 40G4          55.8      14.5  469. 6178. "Flounder\nLarge cod"  0.0210 
 3  2015 40G4          55.8      14.5  469. 6178. "Small cod\nLarge cod" 0.0346 
 4  2015 40G6          55.8      16.5  594. 6179. "Flounder\nSmall cod"  0.00160
 5  2015 40G6          55.8      16.5  594. 6179. "Flounder\nLarge cod"  0.00240
 6  2015 40G6          55.8      16.5  594. 6179. "Small cod\nLarge cod" 0.347  
 7  2015 41G7          56.2      17.5  655. 6237. "Flounder\nSmall cod"  0.230  
 8  2015 41G7          56.2      17.5  655. 6237. "Flounder\nLarge cod"  0.0948 
 9  2015 41G7          56.2      17.5  655. 6237. "Small cod\nLarge cod" 0.222  
10  2015 41G8          56.2      18.5  717. 6239. "Flounder\nSmall cod"  0.105  
# ℹ 196 more rows
# Plot diet overlap
# plot_map +
#   geom_sf(color = "gray80") + 
#   geom_point(data = ovr, aes(X*1000, Y*1000, color = overlap),
#              size = 10) + 
#   viridis::scale_color_viridis() +
#   geom_sf() + 
#   facet_wrap(~overlap_group, ncol = 2) + 
#   NULL
  
set.seed(99)
ps <- ggplot(ovr, aes(overlap_group, overlap, color = ices_rect)) + 
  geom_jitter(height = 0, width = 0.1, alpha = 0.3) + 
  geom_boxplot(aes(overlap_group, overlap),
               inherit.aes = FALSE, fill = NA, width = 0.2, alpha = 0.2, size = 0.6) + 
  viridis::scale_color_viridis(discrete = TRUE, name = "ICES\nrectangle") + 
  geom_hline(yintercept = 0.6, linetype = 2, alpha = 0.6) + 
  labs(y = "Schoener's overlap index",
       x = "") + 
  theme(legend.position = "bottom",
        legend.key.size = unit(0.01, "cm"),
        legend.text = element_text(size = 7),
        legend.title = element_text(size = 8),
        legend.margin = margin(-0.3, 0, 0, 0, unit = "cm"))

ps

# summary(ovr$overlap)
# 
ovr %>% filter(overlap == 0)
filter (grouped): removed 202 rows (98%), 4 rows remaining
# A tibble: 4 × 8
# Groups:   year [3]
   year ices_rect latitude longitude     X     Y overlap_group         overlap
  <dbl> <chr>        <dbl>     <dbl> <dbl> <dbl> <chr>                   <dbl>
1  2018 42G6          56.8      16.5  592. 6291. "Flounder\nLarge cod"       0
2  2021 43G6          57.2      16.5  591. 6346. "Flounder\nSmall cod"       0
3  2021 43G6          57.2      16.5  591. 6346. "Flounder\nLarge cod"       0
4  2022 42G6          56.8      16.5  592. 6291. "Flounder\nLarge cod"       0

How does the diet overlap relate to biomass density of the species?

Trawl data

## Read in trawl data. There's one file for cod and one for flounder
# They go up to 2020 Q1
# trawl_surveys_cod <- read.csv("data/stomach-data/BITS/trawl/Trawl Surveys Zincl (L) COD.csv", sep = ";")
# trawl_surveys_fle <- read.csv("data/stomach-data/BITS/trawl/Trawl Surveys Zincl (L) FLE.csv", sep = ";")

trawl_surveys_cod <- read_delim(paste0(home, "/data/stomach-data/BITS/trawl/Trawl Surveys Zincl (L) COD.csv"), delim = ";")
Rows: 9199 Columns: 35
── Column specification ────────────────────────────────────────────────────────
Delimiter: ";"
chr  (10): Species, Index, Appr. status, Validity, Station name, ICES, Dist....
dbl  (13): Year, Quarter, Month, Seqno, Haul, Subsam, Processing, Size, Subd...
num   (7): Lat, Long, Bottom depth, Doorspread, Opening, Tot. no./hour, No./...
lgl   (4): Sex, Headropedepth, DNA sampleID, NumberToSample
date  (1): Date

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
trawl_surveys_fle <- read_delim(paste0(home, "/data/stomach-data/BITS/trawl/Trawl Surveys Zincl (L) FLE.csv"), delim = ";")
Rows: 7306 Columns: 35
── Column specification ────────────────────────────────────────────────────────
Delimiter: ";"
chr  (10): Species, Index, Appr. status, Validity, Station name, ICES, Dist....
dbl  (13): Year, Quarter, Month, Seqno, Haul, Subsam, Processing, Size, Subd...
num   (7): Lat, Long, Bottom depth, Doorspread, Opening, Tot. no./hour, No./...
lgl   (4): Sex, Headropedepth, DNA sampleID, NumberToSample
date  (1): Date

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Combine for the two species, filter and clean!
trawl_data <- rbind(trawl_surveys_cod, trawl_surveys_fle) %>%
  clean_names() %>%
  mutate(swept_area = as.numeric(gsub(",", "\\.", swept_area)),
         kg_hour = as.numeric(gsub(",", "\\.", kg_hour)),
         dist = as.numeric(gsub(",", "\\.", dist))) %>% 
  as.data.frame()

str(trawl_data)
'data.frame':   16505 obs. of  35 variables:
 $ species            : chr  "cod" "cod" "cod" "cod" ...
 $ date               : Date, format: "2015-02-26" "2015-02-26" ...
 $ year               : num  2015 2015 2015 2015 2015 ...
 $ quarter            : num  1 1 1 1 1 1 1 1 1 1 ...
 $ month              : num  2 2 2 2 2 2 2 2 2 2 ...
 $ index              : chr  "OXBH.2015.2" "OXBH.2015.2" "OXBH.2015.2" "OXBH.2015.2" ...
 $ appr_status        : chr  "Locked" "Locked" "Locked" "Locked" ...
 $ seqno              : num  10048 10048 10048 10048 10048 ...
 $ haul               : num  2 2 2 2 2 2 2 2 2 2 ...
 $ validity           : chr  "V" "V" "V" "V" ...
 $ station_name       : chr  "SLAGGENABBEN" "SLAGGENABBEN" "SLAGGENABBEN" "SLAGGENABBEN" ...
 $ subsam             : num  0 0 0 0 0 0 0 0 0 0 ...
 $ processing         : num  0 0 0 0 0 0 0 0 0 0 ...
 $ size               : num  9 9 9 9 9 9 9 9 9 9 ...
 $ sex                : logi  NA NA NA NA NA NA ...
 $ subdiv             : num  25 25 25 25 25 25 25 25 25 25 ...
 $ rect               : num  3959 3959 3959 3959 3959 ...
 $ ices               : chr  "39G4" "39G4" "39G4" "39G4" ...
 $ lat                : num  55289 55289 55289 55289 55289 ...
 $ long               : num  143628 143628 143628 143628 143628 ...
 $ bottom_depth       : num  603 603 603 603 603 603 603 603 603 603 ...
 $ duration           : num  30 30 30 30 30 30 30 30 30 30 ...
 $ dist               : num  1.49 1.49 1.49 1.49 1.49 1.49 1.49 1.49 1.49 1.49 ...
 $ doorspread         : num  875 875 875 875 875 875 875 875 875 875 ...
 $ opening            : num  56 56 56 56 56 56 56 56 56 56 ...
 $ headropedepth      : logi  NA NA NA NA NA NA ...
 $ swept_area         : num  0.483 0.483 0.483 0.483 0.483 0.483 0.483 0.483 0.483 0.483 ...
 $ kg_hour            : num  1606 1606 1606 1606 1606 ...
 $ tot_no_hour        : num  57467 57467 57467 57467 57467 ...
 $ lengthcl           : num  200 210 220 230 240 250 260 270 280 290 ...
 $ no_hour            : num  552 368 736 184 1472 ...
 $ length_measure_type: chr  "Total length TL" "Total length TL" "Total length TL" "Total length TL" ...
 $ dna_sample_id      : logi  NA NA NA NA NA NA ...
 $ number_to_sample   : logi  NA NA NA NA NA NA ...
 $ smhi_serial_no     : num  1 1 1 1 1 1 1 1 1 1 ...
sort(unique(trawl_data$lat)) %>% head(20)
 [1] 5541 5542 5608 5613 5619 5629 5632 5634 5703 5706 5712 5717 5721 5722 5723
[16] 5728 5729 5743 5746 5750
# For these coordinates, we can use the function Fede provided:
format.position <- function(x){
  sign.x <- sign(x)
  x <- abs(x)
  x <- ifelse(nchar(x)==3, paste("0",x,sep=""), x)
  x <- ifelse(nchar(x)==2, paste("00",x,sep=""), x)
  x <- ifelse(nchar(x)==1, paste("000",x,sep=""), x)
  dec.x <- as.numeric(paste(substring(x,1,2)))+as.numeric(paste(substring(x,3,4)))/60
  dec.x <- sign.x*dec.x
}

# Apply function
trawl_data$lat <- format.position(trawl_data$lat)
trawl_data$lon <- format.position(trawl_data$long)

trawl_data <- sdmTMB::add_utm_columns(trawl_data, ll_names = c("lon", "lat"))
Warning: Multiple UTM zones detected.
Proceeding with the most common value.
You may wish to choose a different projection.
Proceeding with UTM zone 33N; CRS = 32633.
Visit https://epsg.io/32633 to verify.
# SMHI serial no?
t <- trawl_data %>% drop_na(smhi_serial_no)
drop_na: removed 2,390 rows (14%), 14,115 rows remaining
plot_map + 
  geom_point(data = trawl_data, aes(X*1000, Y*1000, color = factor(year)),
             alpha = 0.5, size = 0.3) +
  theme_sleek(base_size = 8)
Warning: `guide_colourbar()` needs continuous scales.
Warning: Removed 75 rows containing missing values or values outside the scale range
(`geom_point()`).

Read and join the biologial data

trawl_surveys_s_cod <- read_delim(paste0(home, "/data/stomach-data/BITS/biological/Trawl Surveys (S) COD.csv"), delim = ";")
Rows: 10381 Columns: 33
── Column specification ────────────────────────────────────────────────────────
Delimiter: ";"
chr   (9): Appr. status, Sampling type, Validity, Station name, ICES, Specie...
dbl  (22): Year, Quarter, Month, Trip No, Seqno, Haul, Subsam, Processing, S...
lgl   (1): Maturity No (SMSF)
date  (1): Date  

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
trawl_surveys_s_flounder <- read_delim(paste0(home, "/data/stomach-data/BITS/biological/Trawl Surveys (S) FLE.csv"), delim = ";")
Rows: 13946 Columns: 33
── Column specification ────────────────────────────────────────────────────────
Delimiter: ";"
chr   (9): Appr. status, Sampling type, Validity, Station name, ICES, Specie...
dbl  (20): Year, Quarter, Month, Trip No, Seqno, Haul, Subsam, Processing, S...
lgl   (3): Age quality, Age grading, Maturity No (SMSF)
date  (1): Date  

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
trawl_surveys_s_cod$Species <- "Cod"
trawl_surveys_s_flounder$Species <- "Flounder"

bio_data <- bind_rows(trawl_surveys_s_cod, trawl_surveys_s_flounder) %>%
  clean_names()

# Join the trawl, bio and stomach data. First create a unique ID.
# In earlier versions I had a column called otolith number (fish ID!), which was really fish id, but it isn't here anymore.

# Add a new species code in the trawl data that matches the stomach data 
trawl_data <- trawl_data %>% mutate(predator_code = ifelse(species == "cod", "COD", "FLE"))

bio_data <- bio_data %>% mutate(predator_code = ifelse(species == "Cod", "COD", "FLE"))

unique(is.na(trawl_data[, c("year", "quarter", "haul")]))
      year quarter  haul
[1,] FALSE   FALSE FALSE
# Go ahead
trawl_data$haul_id <- paste(trawl_data$year,
                            trawl_data$quarter,
                            trawl_data$haul,
                            sep = "_")

# Should be a unique ID per length and predator code
trawl_data %>% 
  group_by(haul_id, lengthcl, predator_code) %>% 
  mutate(n = n()) %>% 
  ungroup() %>% 
  distinct(n)
ungroup: no grouping variables
distinct: removed 16,504 rows (>99%), one row remaining
# A tibble: 1 × 1
      n
  <int>
1     1
# Now we want the cpue of a species-size combination as a column, then make it distinct per haul
trawl_data_unique_haul <- trawl_data %>% 
  dplyr::select(-species, -lengthcl, -predator_code, -kg_hour, -tot_no_hour, -no_hour, -length_measure_type, -sex, -long) %>% # Remove any column that has anything to do with catch, because that info should come from the species dataframes below. I.e., after left_joining this and the catch data, any 0 zero catches should be NA in the joined data
  distinct(haul_id, .keep_all = TRUE)
distinct: removed 15,851 rows (96%), 654 rows remaining
trawl_data_cod <- trawl_data %>% 
  filter(!validity == "I") %>% 
  filter(predator_code == "COD") %>% 
  mutate(kg = kg_hour * (duration / 60),
         kg_km2 = kg / swept_area) %>% # make it biomass density
  drop_na(kg_km2) %>% 
  mutate(size_group = ifelse(lengthcl >= 100 & lengthcl < 250, "small", NA),
         size_group = ifelse(lengthcl >= 250 & lengthcl < 500, "medium", size_group)) %>% 
  group_by(haul_id, size_group) %>% 
  summarise(kg_km2 = sum(kg_km2)) %>% 
  filter(size_group %in% c("small", "medium")) %>% 
  pivot_wider(names_from = size_group, values_from = kg_km2) %>% 
  rename(mcod_kg_km2 = medium,
         scod_kg_km2 = small) %>% 
  mutate(mcod_kg_km2 = replace_na(mcod_kg_km2, 0),
         scod_kg_km2 = replace_na(scod_kg_km2, 0))
filter: removed 32 rows (<1%), 16,473 rows remaining
filter: removed 7,290 rows (44%), 9,183 rows remaining
drop_na: removed 169 rows (2%), 9,014 rows remaining
summarise: now 1,130 rows and 3 columns, one group variable remaining (haul_id)
filter (grouped): removed 253 rows (22%), 877 rows remaining
pivot_wider: reorganized (size_group, kg_km2) into (medium, small) [was 877x3, now 468x3]
rename: renamed 2 variables (mcod_kg_km2, scod_kg_km2)
# Ok, so because this is catches (no hauls with no fish... the only reason I have 0 catches is if one of the size-groups doesn't have a catch!)

# Check with a haul_id
trawl_data %>% 
  filter(predator_code == "COD" & haul_id == "2015_1_20") 
filter: removed 16,500 rows (>99%), 5 rows remaining
  species       date year quarter month        index appr_status seqno haul
1     cod 2015-02-28 2015       1     2 OXBH.2015.20      Locked 10057   20
2     cod 2015-02-28 2015       1     2 OXBH.2015.20      Locked 10057   20
3     cod 2015-02-28 2015       1     2 OXBH.2015.20      Locked 10057   20
4     cod 2015-02-28 2015       1     2 OXBH.2015.20      Locked 10057   20
5     cod 2015-02-28 2015       1     2 OXBH.2015.20      Locked 10057   20
  validity     station_name subsam processing size sex subdiv rect ices
1        V 11 S HOBURG BANK      0          0    9  NA     26 4163 41G8
2        V 11 S HOBURG BANK      0          0    9  NA     26 4163 41G8
3        V 11 S HOBURG BANK      0          0    9  NA     26 4163 41G8
4        V 11 S HOBURG BANK      0          0    9  NA     26 4163 41G8
5        V 11 S HOBURG BANK      0          0    9  NA     26 4163 41G8
       lat   long bottom_depth duration dist doorspread opening headropedepth
1 56.38333 182938          377       30  1.5        849       6            NA
2 56.38333 182938          377       30  1.5        849       6            NA
3 56.38333 182938          377       30  1.5        849       6            NA
4 56.38333 182938          377       30  1.5        849       6            NA
5 56.38333 182938          377       30  1.5        849       6            NA
  swept_area kg_hour tot_no_hour lengthcl no_hour length_measure_type
1      0.472   4.508          10      260       2     Total length TL
2      0.472   4.508          10      330       2     Total length TL
3      0.472   4.508          10      350       2     Total length TL
4      0.472   4.508          10      400       2     Total length TL
5      0.472   4.508          10      410       2     Total length TL
  dna_sample_id number_to_sample smhi_serial_no      lon        X        Y
1            NA               NA             19 18.48333 715.0414 6254.191
2            NA               NA             19 18.48333 715.0414 6254.191
3            NA               NA             19 18.48333 715.0414 6254.191
4            NA               NA             19 18.48333 715.0414 6254.191
5            NA               NA             19 18.48333 715.0414 6254.191
  predator_code   haul_id
1           COD 2015_1_20
2           COD 2015_1_20
3           COD 2015_1_20
4           COD 2015_1_20
5           COD 2015_1_20
trawl_data_cod
# A tibble: 468 × 3
# Groups:   haul_id [468]
   haul_id   mcod_kg_km2 scod_kg_km2
   <chr>           <dbl>       <dbl>
 1 2015_1_10   19145.         6892. 
 2 2015_1_12    7867.         2503. 
 3 2015_1_14   12351.         6444. 
 4 2015_1_15    3899.         2481. 
 5 2015_1_17    3583.         1792. 
 6 2015_1_2    38232.         8311. 
 7 2015_1_20      23.9           0  
 8 2015_1_21       0.401         0  
 9 2015_1_22     203.           18.4
10 2015_1_25      30.3          10.1
# ℹ 458 more rows
# Now do the same for flounder and join with the cod data, then with haul data!
# Different code because we do not need to worry about size
trawl_data_fle <- trawl_data %>% 
  filter(!validity == "I") %>% 
  filter(predator_code == "FLE") %>% 
  mutate(kg = kg_hour * (duration / 60),
         kg_km2 = kg / swept_area) %>% # make it biomass density
  #drop_na(kg_km2) %>% 
  mutate(size_group = ifelse(lengthcl >= 100, "benthic", "all")) %>% 
  group_by(haul_id, size_group) %>% 
  summarise(fle_kg_km2 = sum(kg_km2)) %>% 
  filter(size_group == "benthic")
filter: removed 32 rows (<1%), 16,473 rows remaining
filter: removed 9,183 rows (56%), 7,290 rows remaining
summarise: now 642 rows and 3 columns, one group variable remaining (haul_id)
filter (grouped): removed 152 rows (24%), 490 rows remaining
# Add back summaries to dataframe of unique hauls
trawl_data_unique_haul <- trawl_data_unique_haul %>% filter(!validity == "I")
filter: removed 16 rows (2%), 638 rows remaining
trawl_data_wide <- left_join(trawl_data_unique_haul, trawl_data_cod, by = "haul_id")
left_join: added 2 columns (mcod_kg_km2, scod_kg_km2)
           > rows only in x   170
           > rows only in y  (  0)
           > matched rows     468
           >                 =====
           > rows total       638
trawl_data_wide <- left_join(trawl_data_wide, trawl_data_fle, by = "haul_id")
left_join: added 2 columns (size_group, fle_kg_km2)
           > rows only in x   148
           > rows only in y  (  0)
           > matched rows     490
           >                 =====
           > rows total       638
# Change the NA's to 0's... 
trawl_data_wide <- trawl_data_wide %>% 
  mutate(scod_kg_km2 = replace_na(scod_kg_km2, 0),
         mcod_kg_km2 = replace_na(mcod_kg_km2, 0),
         fle_kg_km2 = replace_na(fle_kg_km2, 0))

# Now add in the same ID in the bio_data
unique(is.na(bio_data[, c("year", "quarter", "haul")]))
      year quarter  haul
[1,] FALSE   FALSE FALSE
# Go ahead
bio_data$haul_id <- paste(bio_data$year,
                          bio_data$quarter,
                          bio_data$haul,
                          sep = "_")
                                
unique(bio_data$haul_id) %>% head(20)
 [1] "2015_1_94"  "2022_4_249" "2022_4_278" "2021_1_85"  "2022_1_53" 
 [6] "2018_4_9"   "2016_4_43"  "2017_1_73"  "2017_4_2"   "2017_4_4"  
[11] "2017_4_6"   "2019_4_90"  "2022_1_82"  "2016_1_55"  "2022_1_99" 
[16] "2022_4_246" "2022_4_254" "2022_4_279" "2021_1_80"  "2022_1_57" 
# Now join in trawl data into the bio data (and then into stomach data)
colnames(bio_data)
 [1] "date"                "year"                "quarter"            
 [4] "month"               "trip_no"             "appr_status"        
 [7] "sampling_type"       "seqno"               "haul"               
[10] "validity"            "station_name"        "subsam"             
[13] "processing"          "size"                "max_maturity"       
[16] "subdiv"              "rect"                "ices"               
[19] "species"             "lengthcl"            "fishno"             
[22] "age"                 "age_quality"         "age_grading"        
[25] "weight"              "sex"                 "maturity"           
[28] "maturity_no_smsf"    "stomach_sample_code" "specimen_note"      
[31] "individual_no"       "spec_tsn"            "day_night"          
[34] "predator_code"       "haul_id"            
colnames(trawl_data_wide)
 [1] "date"             "year"             "quarter"          "month"           
 [5] "index"            "appr_status"      "seqno"            "haul"            
 [9] "validity"         "station_name"     "subsam"           "processing"      
[13] "size"             "subdiv"           "rect"             "ices"            
[17] "lat"              "bottom_depth"     "duration"         "dist"            
[21] "doorspread"       "opening"          "headropedepth"    "swept_area"      
[25] "dna_sample_id"    "number_to_sample" "smhi_serial_no"   "lon"             
[29] "X"                "Y"                "haul_id"          "mcod_kg_km2"     
[33] "scod_kg_km2"      "size_group"       "fle_kg_km2"      
# Check first for overlapping columns, and if so, if one of the datasets has any NAs
common_cols <- intersect(colnames(bio_data), colnames(trawl_data_wide))

unique(is.na(trawl_data_wide[, common_cols]))
      date  year quarter month appr_status seqno  haul validity station_name
[1,] FALSE FALSE   FALSE FALSE       FALSE FALSE FALSE    FALSE        FALSE
[2,] FALSE FALSE   FALSE FALSE       FALSE FALSE FALSE    FALSE        FALSE
     subsam processing  size subdiv  rect  ices haul_id
[1,]  FALSE      FALSE FALSE  FALSE FALSE FALSE   FALSE
[2,]   TRUE       TRUE  TRUE  FALSE FALSE FALSE   FALSE
unique(is.na(bio_data[, common_cols]))
      date  year quarter month appr_status seqno  haul validity station_name
[1,] FALSE FALSE   FALSE FALSE       FALSE FALSE FALSE    FALSE        FALSE
     subsam processing  size subdiv  rect  ices haul_id
[1,]  FALSE      FALSE FALSE  FALSE FALSE FALSE   FALSE
# Trawl data has some NA's in the common columns. Select only the important columns
colnames(trawl_data_wide)[!colnames(trawl_data_wide) %in% colnames(bio_data)]
 [1] "index"            "lat"              "bottom_depth"     "duration"        
 [5] "dist"             "doorspread"       "opening"          "headropedepth"   
 [9] "swept_area"       "dna_sample_id"    "number_to_sample" "smhi_serial_no"  
[13] "lon"              "X"                "Y"                "mcod_kg_km2"     
[17] "scod_kg_km2"      "size_group"       "fle_kg_km2"      
trawl_data_wide <- trawl_data_wide %>% dplyr::select(haul_id, fle_kg_km2, mcod_kg_km2, scod_kg_km2, lat, lon, bottom_depth, X, Y, smhi_serial_no)

bio_data <- left_join(bio_data, trawl_data_wide, by = "haul_id")
left_join: added 9 columns (fle_kg_km2, mcod_kg_km2, scod_kg_km2, lat, lon, …)
           > rows only in x        0
           > rows only in y  (   146)
           > matched rows     24,327
           >                 ========
           > rows total       24,327
names(bio_data)
 [1] "date"                "year"                "quarter"            
 [4] "month"               "trip_no"             "appr_status"        
 [7] "sampling_type"       "seqno"               "haul"               
[10] "validity"            "station_name"        "subsam"             
[13] "processing"          "size"                "max_maturity"       
[16] "subdiv"              "rect"                "ices"               
[19] "species"             "lengthcl"            "fishno"             
[22] "age"                 "age_quality"         "age_grading"        
[25] "weight"              "sex"                 "maturity"           
[28] "maturity_no_smsf"    "stomach_sample_code" "specimen_note"      
[31] "individual_no"       "spec_tsn"            "day_night"          
[34] "predator_code"       "haul_id"             "fle_kg_km2"         
[37] "mcod_kg_km2"         "scod_kg_km2"         "lat"                
[40] "lon"                 "bottom_depth"        "X"                  
[43] "Y"                   "smhi_serial_no"     

Summarise haul data on the ICES rectangle level to join into the diet

# Now calculate spatial overlap in biomass density... 
year_rect_sum_biom <- bio_data %>% 
  dplyr::select(fle_kg_km2, mcod_kg_km2, scod_kg_km2, ices, year) %>% 
  rename(ices_rect = ices) %>% #,
  group_by(year, ices_rect) %>%
  summarise(mean_fle = mean(fle_kg_km2),
            mean_scod = mean(scod_kg_km2),
            mean_mcod = mean(mcod_kg_km2)) 
rename: renamed one variable (ices_rect)
summarise: now 117 rows and 5 columns, one group variable remaining (year)
# Join into diet data
ovr <- ovr %>% 
  pivot_wider(names_from = overlap_group, values_from = overlap) %>% 
  drop_na() %>% 
  left_join(year_rect_sum_biom)
pivot_wider: reorganized (overlap_group, overlap) into (Flounder
Small cod, Flounder
Large cod, Small cod
Large cod) [was 206x8, now 82x9]
drop_na (grouped): removed 20 rows (24%), 62 rows remaining
Joining with `by = join_by(year, ices_rect)`
left_join: added 3 columns (mean_fle, mean_scod, mean_mcod)
> rows only in x 0
> rows only in y (55)
> matched rows 62
> ====
> rows total 62

Fit zero-one inflated beta regressions using brms

#https://www.andrewheiss.com/blog/2021/11/08/beta-regression-guide/#zero-inflated-beta-regression-bayesian-style
  
# full model
ovr <- ovr %>%
  ungroup() %>% 
  clean_names() %>% 
  pivot_longer(c("flounder_small_cod", "flounder_large_cod", "small_cod_large_cod")) %>% 
  mutate(value = ifelse(value < 1e-15, 0, value),
         mean_biom_sc = as.numeric(scale(mean_fle + mean_scod + mean_mcod) / 3)#,
         )
ungroup: no grouping variables
pivot_longer: reorganized (flounder_small_cod, flounder_large_cod, small_cod_large_cod) into (name, value) [was 62x12, now 186x11]
zib_model <- bf(
  value ~ name*mean_biom_sc,
  phi ~ name,
  zi ~ name,
  family = zero_inflated_beta()
)

fit <- brm(
  formula = zib_model,
  data = ovr,
  cores = 4,
  chains = 4,
  iter = 4000
)
Compiling Stan program...
Start sampling
fit
 Family: zero_inflated_beta 
  Links: mu = logit; phi = log; zi = logit 
Formula: value ~ name * mean_biom_sc 
         phi ~ name
         zi ~ name
   Data: ovr (Number of observations: 186) 
  Draws: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
         total post-warmup draws = 8000

Population-Level Effects: 
                                     Estimate Est.Error l-95% CI u-95% CI Rhat
Intercept                               -2.52      0.16    -2.82    -2.21 1.00
phi_Intercept                            2.23      0.21     1.80     2.62 1.00
zi_Intercept                            -4.23      1.04    -6.72    -2.60 1.00
nameflounder_small_cod                   0.85      0.20     0.44     1.25 1.00
namesmall_cod_large_cod                  1.71      0.22     1.28     2.13 1.00
mean_biom_sc                            -0.28      0.38    -1.11     0.39 1.00
nameflounder_small_cod:mean_biom_sc      0.88      0.50    -0.07     1.91 1.00
namesmall_cod_large_cod:mean_biom_sc    -0.37      0.62    -1.60     0.85 1.00
phi_nameflounder_small_cod              -0.43      0.28    -0.98     0.13 1.00
phi_namesmall_cod_large_cod             -1.43      0.27    -1.95    -0.91 1.00
zi_nameflounder_small_cod               -0.02      1.51    -3.04     2.94 1.00
zi_namesmall_cod_large_cod               0.01      1.47    -2.97     3.01 1.00
                                     Bulk_ESS Tail_ESS
Intercept                                4159     4295
phi_Intercept                            4166     4209
zi_Intercept                             5847     3794
nameflounder_small_cod                   4610     5315
namesmall_cod_large_cod                  4997     5129
mean_biom_sc                             5526     4556
nameflounder_small_cod:mean_biom_sc      5991     5161
namesmall_cod_large_cod:mean_biom_sc     5891     5282
phi_nameflounder_small_cod               4937     4946
phi_namesmall_cod_large_cod              4886     5371
zi_nameflounder_small_cod                4795     4212
zi_namesmall_cod_large_cod               4959     3839

Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
get_variables(fit)
 [1] "b_Intercept"                           
 [2] "b_phi_Intercept"                       
 [3] "b_zi_Intercept"                        
 [4] "b_nameflounder_small_cod"              
 [5] "b_namesmall_cod_large_cod"             
 [6] "b_mean_biom_sc"                        
 [7] "b_nameflounder_small_cod:mean_biom_sc" 
 [8] "b_namesmall_cod_large_cod:mean_biom_sc"
 [9] "b_phi_nameflounder_small_cod"          
[10] "b_phi_namesmall_cod_large_cod"         
[11] "b_zi_nameflounder_small_cod"           
[12] "b_zi_namesmall_cod_large_cod"          
[13] "lprior"                                
[14] "lp__"                                  
[15] "accept_stat__"                         
[16] "stepsize__"                            
[17] "treedepth__"                           
[18] "n_leapfrog__"                          
[19] "divergent__"                           
[20] "energy__"                              
plot(fit)

# https://discourse.mc-stan.org/t/trying-to-understand-default-prior-for-the-hurdle-part/24337
priors <- prior_summary(fit)
priors
                prior     class                                 coef group resp
               (flat)         b                                                
               (flat)         b                         mean_biom_sc           
               (flat)         b               nameflounder_small_cod           
               (flat)         b  nameflounder_small_cod:mean_biom_sc           
               (flat)         b              namesmall_cod_large_cod           
               (flat)         b namesmall_cod_large_cod:mean_biom_sc           
               (flat)         b                                                
               (flat)         b               nameflounder_small_cod           
               (flat)         b              namesmall_cod_large_cod           
               (flat)         b                                                
               (flat)         b               nameflounder_small_cod           
               (flat)         b              namesmall_cod_large_cod           
 student_t(3, 0, 2.5) Intercept                                                
 student_t(3, 0, 2.5) Intercept                                                
       logistic(0, 1) Intercept                                                
 dpar nlpar lb ub       source
                       default
                  (vectorized)
                  (vectorized)
                  (vectorized)
                  (vectorized)
                  (vectorized)
  phi                  default
  phi             (vectorized)
  phi             (vectorized)
   zi                  default
   zi             (vectorized)
   zi             (vectorized)
                       default
  phi                  default
   zi                  default
stancode(fit)
// generated with brms 2.20.4
functions {
  /* zero-inflated beta log-PDF of a single response
   * Args:
   *   y: the response value
   *   mu: mean parameter of the beta distribution
   *   phi: precision parameter of the beta distribution
   *   zi: zero-inflation probability
   * Returns:
   *   a scalar to be added to the log posterior
   */
   real zero_inflated_beta_lpdf(real y, real mu, real phi, real zi) {
     row_vector[2] shape = [mu * phi, (1 - mu) * phi];
     if (y == 0) {
       return bernoulli_lpmf(1 | zi);
     } else {
       return bernoulli_lpmf(0 | zi) +
              beta_lpdf(y | shape[1], shape[2]);
     }
   }
  /* zero-inflated beta log-PDF of a single response
   * logit parameterization of the zero-inflation part
   * Args:
   *   y: the response value
   *   mu: mean parameter of the beta distribution
   *   phi: precision parameter of the beta distribution
   *   zi: linear predictor for zero-inflation part
   * Returns:
   *   a scalar to be added to the log posterior
   */
   real zero_inflated_beta_logit_lpdf(real y, real mu, real phi, real zi) {
     row_vector[2] shape = [mu * phi, (1 - mu) * phi];
     if (y == 0) {
       return bernoulli_logit_lpmf(1 | zi);
     } else {
       return bernoulli_logit_lpmf(0 | zi) +
              beta_lpdf(y | shape[1], shape[2]);
     }
   }
  // zero-inflated beta log-CCDF and log-CDF functions
  real zero_inflated_beta_lccdf(real y, real mu, real phi, real zi) {
    row_vector[2] shape = [mu * phi, (1 - mu) * phi];
    return bernoulli_lpmf(0 | zi) + beta_lccdf(y | shape[1], shape[2]);
  }
  real zero_inflated_beta_lcdf(real y, real mu, real phi, real zi) {
    return log1m_exp(zero_inflated_beta_lccdf(y | mu, phi, zi));
  }
}
data {
  int<lower=1> N;  // total number of observations
  vector[N] Y;  // response variable
  int<lower=1> K;  // number of population-level effects
  matrix[N, K] X;  // population-level design matrix
  int<lower=1> Kc;  // number of population-level effects after centering
  int<lower=1> K_phi;  // number of population-level effects
  matrix[N, K_phi] X_phi;  // population-level design matrix
  int<lower=1> Kc_phi;  // number of population-level effects after centering
  int<lower=1> K_zi;  // number of population-level effects
  matrix[N, K_zi] X_zi;  // population-level design matrix
  int<lower=1> Kc_zi;  // number of population-level effects after centering
  int prior_only;  // should the likelihood be ignored?
}
transformed data {
  matrix[N, Kc] Xc;  // centered version of X without an intercept
  vector[Kc] means_X;  // column means of X before centering
  matrix[N, Kc_phi] Xc_phi;  // centered version of X_phi without an intercept
  vector[Kc_phi] means_X_phi;  // column means of X_phi before centering
  matrix[N, Kc_zi] Xc_zi;  // centered version of X_zi without an intercept
  vector[Kc_zi] means_X_zi;  // column means of X_zi before centering
  for (i in 2:K) {
    means_X[i - 1] = mean(X[, i]);
    Xc[, i - 1] = X[, i] - means_X[i - 1];
  }
  for (i in 2:K_phi) {
    means_X_phi[i - 1] = mean(X_phi[, i]);
    Xc_phi[, i - 1] = X_phi[, i] - means_X_phi[i - 1];
  }
  for (i in 2:K_zi) {
    means_X_zi[i - 1] = mean(X_zi[, i]);
    Xc_zi[, i - 1] = X_zi[, i] - means_X_zi[i - 1];
  }
}
parameters {
  vector[Kc] b;  // regression coefficients
  real Intercept;  // temporary intercept for centered predictors
  vector[Kc_phi] b_phi;  // regression coefficients
  real Intercept_phi;  // temporary intercept for centered predictors
  vector[Kc_zi] b_zi;  // regression coefficients
  real Intercept_zi;  // temporary intercept for centered predictors
}
transformed parameters {
  real lprior = 0;  // prior contributions to the log posterior
  lprior += student_t_lpdf(Intercept | 3, 0, 2.5);
  lprior += student_t_lpdf(Intercept_phi | 3, 0, 2.5);
  lprior += logistic_lpdf(Intercept_zi | 0, 1);
}
model {
  // likelihood including constants
  if (!prior_only) {
    // initialize linear predictor term
    vector[N] mu = rep_vector(0.0, N);
    // initialize linear predictor term
    vector[N] phi = rep_vector(0.0, N);
    // initialize linear predictor term
    vector[N] zi = rep_vector(0.0, N);
    mu += Intercept + Xc * b;
    phi += Intercept_phi + Xc_phi * b_phi;
    zi += Intercept_zi + Xc_zi * b_zi;
    mu = inv_logit(mu);
    phi = exp(phi);
    for (n in 1:N) {
      target += zero_inflated_beta_logit_lpdf(Y[n] | mu[n], phi[n], zi[n]);
    }
  }
  // priors including constants
  target += lprior;
}
generated quantities {
  // actual population-level intercept
  real b_Intercept = Intercept - dot_product(means_X, b);
  // actual population-level intercept
  real b_phi_Intercept = Intercept_phi - dot_product(means_X_phi, b_phi);
  // actual population-level intercept
  real b_zi_Intercept = Intercept_zi - dot_product(means_X_zi, b_zi);
}
pp_check(fit) +
  coord_cartesian(expand = TRUE) +
  labs(x = "Schoener's overlap index",
       y = "Density") + 
  theme(legend.position.inside = c(0.8, 0.8)) + 
  guides(color = guide_legend(position = "inside"))
Using 10 posterior draws for ppc type 'dens_overlay' by default.
Coordinate system already present. Adding new coordinate system, which will
replace the existing one.

ggsave(paste0(home, "/figures/supp/schoener_zib_pp_check.pdf"), width = 11, height = 11, units = "cm")

conditional_effects(fit)

set.seed(123)

ovr <- ovr %>% 
  mutate(name2 = ifelse(name == "flounder_large_cod", "Flounder\nLarge cod", name),
         name2 = ifelse(name == "flounder_small_cod", "Flounder\nSmall cod", name2),
         name2 = ifelse(name == "small_cod_large_cod", "Small cod\nLarge cod", name2))

p1 <- ovr %>%
  data_grid(name = unique(name),
            mean_biom_sc = 0) %>%
  mutate(name2 = ifelse(name == "flounder_large_cod", "Flounder\nLarge cod", name),
         name2 = ifelse(name == "flounder_small_cod", "Flounder\nSmall cod", name2),
         name2 = ifelse(name == "small_cod_large_cod", "Small cod\nLarge cod", name2)) %>% 
  add_epred_draws(fit) %>%
  ggplot(aes(.epred, name2)) +
  stat_pointinterval(.width = c(.66, .95)) + 
  geom_vline(xintercept = 0.6, linetype = 2, alpha = 0.6) +
  geom_jitter(data = ovr, aes(value, name2), height = 0.2, width = 0, alpha = 0.3, color = "grey50") +
  labs(y = "Schoener's overlap index", x = "") +
  geom_stripped_rows() +
  coord_cartesian(expand = 0, xlim = c(-0.03, 1)) +
  theme(plot.margin = unit(c(0, 0.3, 0, 0), "cm")) +
  NULL

pal <- brewer.pal(name = "RdBu", n = 7)

# Check CI's
fit %>%
  spread_draws(b_mean_biom_sc,
               `b_nameflounder_small_cod:mean_biom_sc`,
               `b_namesmall_cod_large_cod:mean_biom_sc`) %>%
  mutate("Flounder\nLarge cod" = b_mean_biom_sc,
         "Flounder\nSmall cod" = b_mean_biom_sc + `b_nameflounder_small_cod:mean_biom_sc`,
         "Small cod\nLarge cod" = b_mean_biom_sc + `b_namesmall_cod_large_cod:mean_biom_sc`,
         ) %>% 
  summarise(mean = mean(`Small cod\nLarge cod`),
            upr = quantile(`Small cod\nLarge cod`, probs = 0.845))
summarise: now one row and 2 columns, ungrouped
# A tibble: 1 × 2
    mean    upr
   <dbl>  <dbl>
1 -0.653 -0.167
p2 <- fit %>%
  spread_draws(b_mean_biom_sc,
               `b_nameflounder_small_cod:mean_biom_sc`,
               `b_namesmall_cod_large_cod:mean_biom_sc`) %>%
  mutate("Flounder\nLarge cod" = b_mean_biom_sc,
         "Flounder\nSmall cod" = b_mean_biom_sc + `b_nameflounder_small_cod:mean_biom_sc`,
         "Small cod\nLarge cod" = b_mean_biom_sc + `b_namesmall_cod_large_cod:mean_biom_sc`,
         ) %>%
  pivot_longer(c(`Flounder\nLarge cod`, `Flounder\nSmall cod`, `Small cod\nLarge cod`),
               names_to = "Overlap group", values_to = "slope") %>% 
  ggplot(aes(slope, `Overlap group`, fill = after_stat(x < 0))) +
  ggdist::stat_eye() +
  labs(x = "", y = "Slope of biomass density\non the mean Schoener's diet overlap") +
  geom_vline(xintercept = 0, linetype = "dashed", alpha = 0.6) +
  scale_fill_manual(values = c("gray80", alpha(pal[6], 0.8))) + 
  theme(legend.position.inside = c(0.15, 0.1)) + 
  guides(fill = guide_legend(position = "inside")) +
  geom_stripped_rows() +
  coord_cartesian(expand = 0) +
  NULL
pivot_longer: reorganized (Flounder
Large cod, Flounder
Small cod, Small cod
Large cod) into (Overlap group, slope) [was 8000x9, now 24000x8]
p2

# Conditional
p3 <- ovr %>%
  data_grid(name = unique(name),
            mean_biom_sc = seq_range(mean_biom_sc, n = 101)) %>%
  mutate(name2 = ifelse(name == "flounder_large_cod", "Flounder\nLarge cod", name),
         name2 = ifelse(name == "flounder_small_cod", "Flounder\nSmall cod", name2),
         name2 = ifelse(name == "small_cod_large_cod", "Small cod\nLarge cod", name2)) %>% 
  add_epred_draws(fit) %>%
  ggplot(aes(x = mean_biom_sc, y = value, fill = name2, color = name2)) +
  stat_lineribbon(aes(y = .epred), .width = c(.95), alpha = 1/4) +
  geom_point(data = ovr, alpha = 0.5) +
  scale_fill_brewer(palette = "Pastel1") +
  scale_color_brewer(palette = "Set1") + 
  theme(legend.position.inside = c(0.90, 0.84)) + 
  labs(y = "Schoener's overlap index", x = "Scaled cod and flounder mean biomass density",
       fill = "", color = "") + 
  guides(fill = guide_legend(position = "inside"))

pp <- (p1 + p2) + plot_layout(axes = "collect")

pp / p3 +
  plot_annotation(tag_levels = "A")

ggsave(paste0(home, "/figures/schoener_zib.pdf"), width = 17, height = 20, units = "cm")

# What's the actual predictions
sum <- ovr %>%
  data_grid(name = unique(name),
            mean_biom_sc = 0) %>%
  mutate(name2 = ifelse(name == "flounder_large_cod", "Flounder\nLarge cod", name),
         name2 = ifelse(name == "flounder_small_cod", "Flounder\nSmall cod", name2),
         name2 = ifelse(name == "small_cod_large_cod", "Small cod\nLarge cod", name2)) %>% 
  add_epred_draws(fit) %>% 
  group_by(name) %>% 
  summarise(mean = mean(.epred))
summarise: now 3 rows and 2 columns, ungrouped
sum
# A tibble: 3 × 2
  name                  mean
  <chr>                <dbl>
1 flounder_large_cod  0.0733
2 flounder_small_cod  0.156 
3 small_cod_large_cod 0.301 
p1 + geom_vline(data = sum, aes(xintercept = mean))

knitr::knit_exit()